home *** CD-ROM | disk | FTP | other *** search
/ Mac Easy 2010 May / Mac Life Ubuntu.iso / casper / filesystem.squashfs / usr / share / python-support / python-rdflib / rdflib / store / SQLite.py < prev    next >
Encoding:
Python Source  |  2007-04-04  |  18.4 KB  |  427 lines

  1. from __future__ import generators
  2. from rdflib import BNode
  3. from rdflib.Literal import Literal
  4. from pprint import pprint
  5. from pysqlite2 import dbapi2
  6. import sha,sys,re,os
  7. from rdflib.term_utils import *
  8. from rdflib.Graph import QuotedGraph
  9. from rdflib.store.REGEXMatching import REGEXTerm, NATIVE_REGEX, PYTHON_REGEX
  10. from rdflib.store.AbstractSQLStore import *
  11. Any = None
  12.  
  13. #User-defined REGEXP operator
  14. def regexp(expr, item):
  15.     r = re.compile(expr)
  16.     return r.match(item) is not None
  17.  
  18. class SQLite(AbstractSQLStore):
  19.     """
  20.     SQLite store formula-aware implementation.  It stores it's triples in the following partitions:
  21.  
  22.     - Asserted non rdf:type statements
  23.     - Asserted rdf:type statements (in a table which models Class membership)
  24.     The motivation for this partition is primarily query speed and scalability as most graphs will always have more rdf:type statements than others
  25.     - All Quoted statements
  26.  
  27.     In addition it persists namespace mappings in a seperate table
  28.     """
  29.     context_aware = True
  30.     formula_aware = True
  31.     transaction_aware = True
  32.     regex_matching = PYTHON_REGEX
  33.     autocommit_default = False
  34.  
  35.     def open(self, home, create=True):
  36.         """
  37.         Opens the store specified by the configuration string. If
  38.         create is True a store will be created if it does not already
  39.         exist. If create is False and a store does not already exist
  40.         an exception is raised. An exception is also raised if a store
  41.         exists, but there is insufficient permissions to open the
  42.         store."""
  43.         if create:
  44.             db = dbapi2.connect(os.path.join(home,self.identifier))
  45.             c=db.cursor()
  46.             c.execute(CREATE_ASSERTED_STATEMENTS_TABLE%(self._internedId))
  47.             c.execute(CREATE_ASSERTED_TYPE_STATEMENTS_TABLE%(self._internedId))
  48.             c.execute(CREATE_QUOTED_STATEMENTS_TABLE%(self._internedId))
  49.             c.execute(CREATE_NS_BINDS_TABLE%(self._internedId))
  50.             c.execute(CREATE_LITERAL_STATEMENTS_TABLE%(self._internedId))
  51.             for tblName,indices in [
  52.                 (
  53.                     "%s_asserted_statements",
  54.                     [
  55.                         ("%s_A_termComb_index",('termComb',)),
  56.                         ("%s_A_s_index",('subject',)),
  57.                         ("%s_A_p_index",('predicate',)),
  58.                         ("%s_A_o_index",('object',)),
  59.                         ("%s_A_c_index",('context',)),
  60.                     ],
  61.                 ),
  62.                 (
  63.                     "%s_type_statements",
  64.                     [
  65.                         ("%s_T_termComb_index",('termComb',)),
  66.                         ("%s_member_index",('member',)),
  67.                         ("%s_klass_index",('klass',)),
  68.                         ("%s_c_index",('context',)),
  69.                     ],
  70.                 ),
  71.                 (
  72.                     "%s_literal_statements",
  73.                     [
  74.                         ("%s_L_termComb_index",('termComb',)),
  75.                         ("%s_L_s_index",('subject',)),
  76.                         ("%s_L_p_index",('predicate',)),
  77.                         ("%s_L_c_index",('context',)),
  78.                     ],
  79.                 ),
  80.                 (
  81.                     "%s_quoted_statements",
  82.                     [
  83.                         ("%s_Q_termComb_index",('termComb',)),
  84.                         ("%s_Q_s_index",('subject',)),
  85.                         ("%s_Q_p_index",('predicate',)),
  86.                         ("%s_Q_o_index",('object',)),
  87.                         ("%s_Q_c_index",('context',)),
  88.                     ],
  89.                 ),
  90.                 (
  91.                     "%s_namespace_binds",
  92.                     [
  93.                         ("%s_uri_index",('uri',)),
  94.                     ],
  95.                 )]:
  96.                 for indexName,columns in indices:
  97.                     c.execute("CREATE INDEX %s on %s (%s)"%(indexName%self._internedId,tblName%(self._internedId),','.join(columns)))
  98.             c.close()
  99.             db.commit()
  100.             db.close()
  101.  
  102.         self._db = dbapi2.connect(os.path.join(home,self.identifier))
  103.         self._db.create_function("regexp", 2, regexp)
  104.  
  105.         if os.path.exists(os.path.join(home,self.identifier)):
  106.             c = self._db.cursor()
  107.             c.execute("SELECT * FROM sqlite_master WHERE type='table'")
  108.             tbls = [rt[1] for rt in c.fetchall()]
  109.             c.close()
  110.             for tn in [tbl%(self._internedId) for tbl in table_name_prefixes]:
  111.                 if tn not in tbls:
  112.                     sys.stderr.write("table %s Doesn't exist\n" % (tn));
  113.                     #The database exists, but one of the partitions doesn't exist
  114.                     return 0
  115.             #Everything is there (the database and the partitions)
  116.             return 1
  117.         #The database doesn't exist - nothing is there
  118.         #return -1
  119.  
  120.     def destroy(self, home):
  121.         """
  122.         FIXME: Add documentation
  123.         """
  124.         db = dbapi2.connect(os.path.join(home,self.identifier))
  125.         c=db.cursor()
  126.         for tblsuffix in table_name_prefixes:
  127.             try:
  128.                 c.execute('DROP table %s'%tblsuffix%(self._internedId))
  129.             except:
  130.                 print "unable to drop table: %s"%(tblsuffix%(self._internedId))
  131.  
  132.         #Note, this only removes the associated tables for the closed world universe given by the identifier
  133.         print "Destroyed Close World Universe %s ( in SQLite database %s)"%(self.identifier,home)
  134.         db.commit()
  135.         c.close()
  136.         db.close()
  137.         os.remove(os.path.join(home,self.identifier))
  138.  
  139.     def EscapeQuotes(self,qstr):
  140.         """
  141.         Ported from Ft.Lib.DbUtil
  142.         """
  143.         if qstr is None:
  144.             return ''
  145.         tmp = qstr.replace("\\","\\\\")
  146.         tmp = tmp.replace('"', '""')
  147.         tmp = tmp.replace("'", "\\'")
  148.         return tmp
  149.  
  150.     #This is overridden to leave unicode terms as is
  151.     #Instead of converting them to ascii (the default behavior)
  152.     def normalizeTerm(self,term):
  153.         if isinstance(term,(QuotedGraph,Graph)):
  154.             return term.identifier
  155.         elif isinstance(term,Literal):
  156.             return self.EscapeQuotes(term)
  157.         elif term is None or isinstance(term,(list,REGEXTerm)):
  158.             return term
  159.         else:
  160.             return term
  161.  
  162.     #Where Clause  utility Functions
  163.     #The predicate and object clause builders are modified in order to optimize
  164.     #subjects and objects utility functions which can take lists as their last argument (object,predicate - respectively)
  165.     def buildSubjClause(self,subject,tableName):
  166.         if isinstance(subject,REGEXTerm):
  167.             return " REGEXP (%s,"+" %s)"%(tableName and '%s.subject'%tableName or 'subject'),[subject]
  168.         elif isinstance(subject,list):
  169.             clauseStrings=[]
  170.             paramStrings = []
  171.             for s in subject:
  172.                 if isinstance(s,REGEXTerm):
  173.                     clauseStrings.append(" REGEXP (%s,"+" %s)"%(tableName and '%s.subject'%tableName or 'subject') + " %s")
  174.                     paramStrings.append(self.normalizeTerm(s))
  175.                 elif isinstance(s,(QuotedGraph,Graph)):
  176.                     clauseStrings.append("%s="%(tableName and '%s.subject'%tableName or 'subject')+"%s")
  177.                     paramStrings.append(self.normalizeTerm(s.identifier))
  178.                 else:
  179.                     clauseStrings.append("%s="%(tableName and '%s.subject'%tableName or 'subject')+"%s")
  180.                     paramStrings.append(self.normalizeTerm(s))
  181.             return '('+ ' or '.join(clauseStrings) + ')', paramStrings
  182.         elif isinstance(subject,(QuotedGraph,Graph)):
  183.             return "%s="%(tableName and '%s.subject'%tableName or 'subject')+"%s",[self.normalizeTerm(subject.identifier)]
  184.         else:
  185.             return subject is not None and "%s="%(tableName and '%s.subject'%tableName or 'subject')+"%s",[subject] or None
  186.  
  187.     #Capable off taking a list of predicates as well (in which case sub clauses are joined with 'OR')
  188.     def buildPredClause(self,predicate,tableName):
  189.         if isinstance(predicate,REGEXTerm):
  190.             return " REGEXP (%s,"+" %s)"%(tableName and '%s.predicate'%tableName or 'predicate'),[predicate]
  191.         elif isinstance(predicate,list):
  192.             clauseStrings=[]
  193.             paramStrings = []
  194.             for p in predicate:
  195.                 if isinstance(p,REGEXTerm):
  196.                     clauseStrings.append(" REGEXP (%s,"+" %s)"%(tableName and '%s.predicate'%tableName or 'predicate'))
  197.                 else:
  198.                     clauseStrings.append("%s="%(tableName and '%s.predicate'%tableName or 'predicate')+"%s")
  199.                 paramStrings.append(self.normalizeTerm(p))
  200.             return '('+ ' or '.join(clauseStrings) + ')', paramStrings
  201.         else:
  202.             return predicate is not None and "%s="%(tableName and '%s.predicate'%tableName or 'predicate')+"%s",[predicate] or None
  203.  
  204.     #Capable of taking a list of objects as well (in which case sub clauses are joined with 'OR')
  205.     def buildObjClause(self,obj,tableName):
  206.         if isinstance(obj,REGEXTerm):
  207.             return " REGEXP (%s,"+" %s)"%(tableName and '%s.object'%tableName or 'object'),[obj]
  208.         elif isinstance(obj,list):
  209.             clauseStrings=[]
  210.             paramStrings = []
  211.             for o in obj:
  212.                 if isinstance(o,REGEXTerm):
  213.                     clauseStrings.append(" REGEXP (%s,"+" %s)"%(tableName and '%s.object'%tableName or 'object'))
  214.                     paramStrings.append(self.normalizeTerm(o))
  215.                 elif isinstance(o,(QuotedGraph,Graph)):
  216.                     clauseStrings.append("%s="%(tableName and '%s.object'%tableName or 'object')+"%s")
  217.                     paramStrings.append(self.normalizeTerm(o.identifier))
  218.                 else:
  219.                     clauseStrings.append("%s="%(tableName and '%s.object'%tableName or 'object')+"%s")
  220.                     paramStrings.append(self.normalizeTerm(o))
  221.             return '('+ ' or '.join(clauseStrings) + ')', paramStrings
  222.         elif isinstance(obj,(QuotedGraph,Graph)):
  223.             return "%s="%(tableName and '%s.object'%tableName or 'object')+"%s",[self.normalizeTerm(obj.identifier)]
  224.         else:
  225.             return obj is not None and "%s="%(tableName and '%s.object'%tableName or 'object')+"%s",[obj] or None
  226.  
  227.     def buildContextClause(self,context,tableName):
  228.         context = context is not None and self.normalizeTerm(context.identifier) or context
  229.         if isinstance(context,REGEXTerm):
  230.             return " REGEXP (%s,"+" %s)"%(tableName and '%s.context'%tableName or 'context'),[context]
  231.         else:
  232.             return context is not None and "%s="%(tableName and '%s.context'%tableName or 'context')+"%s",[context] or None
  233.  
  234.     def buildTypeMemberClause(self,subject,tableName):
  235.         if isinstance(subject,REGEXTerm):
  236.             return " REGEXP (%s,"+" %s)"%(tableName and '%s.member'%tableName or 'member'),[subject]
  237.         elif isinstance(subject,list):
  238.             clauseStrings=[]
  239.             paramStrings = []
  240.             for s in subject:
  241.                 clauseStrings.append("%s.member="%tableName+"%s")
  242.                 if isinstance(s,(QuotedGraph,Graph)):
  243.                     paramStrings.append(self.normalizeTerm(s.identifier))
  244.                 else:
  245.                     paramStrings.append(self.normalizeTerm(s))
  246.             return '('+ ' or '.join(clauseStrings) + ')', paramStrings
  247.         else:
  248.             return subject and u"%s.member = "%(tableName)+"%s",[subject]
  249.  
  250.     def buildTypeClassClause(self,obj,tableName):
  251.         if isinstance(obj,REGEXTerm):
  252.             return " REGEXP (%s,"+" %s)"%(tableName and '%s.klass'%tableName or 'klass'),[obj]
  253.         elif isinstance(obj,list):
  254.             clauseStrings=[]
  255.             paramStrings = []
  256.             for o in obj:
  257.                 clauseStrings.append("%s.klass="%tableName+"%s")
  258.                 if isinstance(o,(QuotedGraph,Graph)):
  259.                     paramStrings.append(self.normalizeTerm(o.identifier))
  260.                 else:
  261.                     paramStrings.append(self.normalizeTerm(o))
  262.             return '('+ ' or '.join(clauseStrings) + ')', paramStrings
  263.         else:
  264.             return obj is not None and "%s.klass = "%tableName+"%s",[obj] or None
  265.  
  266.     def triples(self, (subject, predicate, obj), context=None):
  267.         """
  268.         A generator over all the triples matching pattern. Pattern can
  269.         be any objects for comparing against nodes in the store, for
  270.         example, RegExLiteral, Date? DateRange?
  271.  
  272.         quoted table:                <id>_quoted_statements
  273.         asserted rdf:type table:     <id>_type_statements
  274.         asserted non rdf:type table: <id>_asserted_statements
  275.  
  276.         triple columns: subject,predicate,object,context,termComb,objLanguage,objDatatype
  277.         class membership columns: member,klass,context termComb
  278.  
  279.         FIXME:  These union all selects *may* be further optimized by joins
  280.  
  281.         """
  282.         quoted_table="%s_quoted_statements"%self._internedId
  283.         asserted_table="%s_asserted_statements"%self._internedId
  284.         asserted_type_table="%s_type_statements"%self._internedId
  285.         literal_table = "%s_literal_statements"%self._internedId
  286.         c=self._db.cursor()
  287.  
  288.         parameters = []
  289.  
  290.         if predicate == RDF.type:
  291.             #select from asserted rdf:type partition and quoted table (if a context is specified)
  292.             clauseString,params = self.buildClause('typeTable',subject,RDF.type, obj,context,True)
  293.             parameters.extend(params)
  294.             selects = [
  295.                 (
  296.                   asserted_type_table,
  297.                   'typeTable',
  298.                   clauseString,
  299.                   ASSERTED_TYPE_PARTITION
  300.                 ),
  301.             ]
  302.  
  303.         elif isinstance(predicate,REGEXTerm) and predicate.compiledExpr.match(RDF.type) or not predicate:
  304.             #Select from quoted partition (if context is specified), literal partition if (obj is Literal or None) and asserted non rdf:type partition (if obj is URIRef or None)
  305.             selects = []
  306.             if not self.STRONGLY_TYPED_TERMS or isinstance(obj,Literal) or not obj or (self.STRONGLY_TYPED_TERMS and isinstance(obj,REGEXTerm)):
  307.                 clauseString,params = self.buildClause('literal',subject,predicate,obj,context)
  308.                 parameters.extend(params)
  309.                 selects.append((
  310.                   literal_table,
  311.                   'literal',
  312.                   clauseString,
  313.                   ASSERTED_LITERAL_PARTITION
  314.                 ))
  315.             if not isinstance(obj,Literal) and not (isinstance(obj,REGEXTerm) and self.STRONGLY_TYPED_TERMS) or not obj:
  316.                 clauseString,params = self.buildClause('asserted',subject,predicate,obj,context)
  317.                 parameters.extend(params)
  318.                 selects.append((
  319.                   asserted_table,
  320.                   'asserted',
  321.                   clauseString,
  322.                   ASSERTED_NON_TYPE_PARTITION
  323.                 ))
  324.  
  325.             clauseString,params = self.buildClause('typeTable',subject,RDF.type,obj,context,True)
  326.             parameters.extend(params)
  327.             selects.append(
  328.                 (
  329.                   asserted_type_table,
  330.                   'typeTable',
  331.                   clauseString,
  332.                   ASSERTED_TYPE_PARTITION
  333.                 )
  334.             )
  335.  
  336.  
  337.         elif predicate:
  338.             #select from asserted non rdf:type partition (optionally), quoted partition (if context is speciied), and literal partition (optionally)
  339.             selects = []
  340.             if not self.STRONGLY_TYPED_TERMS or isinstance(obj,Literal) or not obj or (self.STRONGLY_TYPED_TERMS and isinstance(obj,REGEXTerm)):
  341.                 clauseString,params = self.buildClause('literal',subject,predicate,obj,context)
  342.                 parameters.extend(params)
  343.                 selects.append((
  344.                   literal_table,
  345.                   'literal',
  346.                   clauseString,
  347.                   ASSERTED_LITERAL_PARTITION
  348.                 ))
  349.             if not isinstance(obj,Literal) and not (isinstance(obj,REGEXTerm) and self.STRONGLY_TYPED_TERMS) or not obj:
  350.                 clauseString,params = self.buildClause('asserted',subject,predicate,obj,context)
  351.                 parameters.extend(params)
  352.                 selects.append((
  353.                   asserted_table,
  354.                   'asserted',
  355.                   clauseString,
  356.                   ASSERTED_NON_TYPE_PARTITION
  357.                 ))
  358.  
  359.         if context is not None:
  360.             clauseString,params = self.buildClause('quoted',subject,predicate, obj,context)
  361.             parameters.extend(params)
  362.             selects.append(
  363.                 (
  364.                   quoted_table,
  365.                   'quoted',
  366.                   clauseString,
  367.                   QUOTED_PARTITION
  368.                 )
  369.             )
  370.  
  371.         q=self._normalizeSQLCmd(unionSELECT(selects,selectType=TRIPLE_SELECT_NO_ORDER))
  372.         self.executeSQL(c,q,parameters)
  373.         #NOTE: SQLite does not support ORDER BY terms that aren't integers, so the entire result set must be iterated
  374.         #in order to be able to return a generator of contexts
  375.         tripleCoverage = {}
  376.         result = c.fetchall()
  377.         c.close()
  378.         for rt in result:
  379.             s,p,o,(graphKlass,idKlass,graphId) = extractTriple(rt,self,context)
  380.             contexts = tripleCoverage.get((s,p,o),[])
  381.             contexts.append(graphKlass(self,idKlass(graphId)))
  382.             tripleCoverage[(s,p,o)] = contexts
  383.  
  384.         for (s,p,o),contexts in tripleCoverage.items():
  385.             yield (s,p,o),(c for c in contexts)
  386.  
  387. CREATE_ASSERTED_STATEMENTS_TABLE = """
  388. CREATE TABLE %s_asserted_statements (
  389.     subject       text not NULL,
  390.     predicate     text not NULL,
  391.     object        text not NULL,
  392.     context       text not NULL,
  393.     termComb      tinyint unsigned not NULL)"""
  394.  
  395. CREATE_ASSERTED_TYPE_STATEMENTS_TABLE = """
  396. CREATE TABLE %s_type_statements (
  397.     member        text not NULL,
  398.     klass         text not NULL,
  399.     context       text not NULL,
  400.     termComb      tinyint unsigned not NULL)"""
  401.  
  402. CREATE_LITERAL_STATEMENTS_TABLE = """
  403. CREATE TABLE %s_literal_statements (
  404.     subject       text not NULL,
  405.     predicate     text not NULL,
  406.     object        text,
  407.     context       text not NULL,
  408.     termComb      tinyint unsigned not NULL,
  409.     objLanguage   varchar(3),
  410.     objDatatype   text)"""
  411.  
  412. CREATE_QUOTED_STATEMENTS_TABLE = """
  413. CREATE TABLE %s_quoted_statements (
  414.     subject       text not NULL,
  415.     predicate     text not NULL,
  416.     object        text,
  417.     context       text not NULL,
  418.     termComb      tinyint unsigned not NULL,
  419.     objLanguage   varchar(3),
  420.     objDatatype   text)"""
  421.  
  422. CREATE_NS_BINDS_TABLE = """
  423. CREATE TABLE %s_namespace_binds (
  424.     prefix        varchar(20) UNIQUE not NULL,
  425.     uri           text,
  426.     PRIMARY KEY (prefix))"""
  427.